/**  
* @Title: XunboCrawler.java
* @Package org.zsen.crawler
* @Description TODO
* @author ZhangSen
* @date 2015年11月2日 下午12:14:22
* @version 
*/ 
package org.zsen.crawler.xunbo;

import cn.edu.hfut.dmic.webcollector.crawler.MultiExtractorCrawler;

/**
* @ClassName: XunboCrawler
* @Description: 
* @author ZhangSen
* @date 2015年11月2日 下午12:14:22
*
*/
public class XunboCrawler extends MultiExtractorCrawler {

	/**
	* Description: TODO
	* @param crawlPath
	* @param autoParse
	*/ 
	public XunboCrawler(String crawlPath, boolean autoParse) {
		super(crawlPath, autoParse);
		this.addSeed("http://www.xiamp4.com/");
//		this.addSeed("http://www.xiamp4.com/play/play12410-0-6.html");
		this.addRegex("http://www.xiamp4.com/.*html");
		this.addRegex("-(http://www.xiamp4.com/play/.*)");
		this.addExtractor("http://www.xiamp4.com/.*html", XunboExtractor.class);
	}
	
	public static void main(String[] args) {
		
		
		XunboCrawler crawler=new XunboCrawler("test", true);
		try {
			crawler.setThreads(8);
			crawler.setResumable(true);
			crawler.start(500);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
/*		WebClient webClient = new WebClient(BrowserVersion.INTERNET_EXPLORER_8);
		webClient.getOptions().setCssEnabled(false);
		webClient.getOptions().setJavaScriptEnabled(true);
		webClient.getOptions().setTimeout(5000);
		webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
		webClient.getOptions().setThrowExceptionOnScriptError(false);
		webClient.setAjaxController(new NicelyResynchronizingAjaxController());
		try {
			webClient.setJavaScriptTimeout(20000);
			HtmlPage page = webClient.getPage("http://www.xiamp4.com/Html/GP1736.html");
			System.err.println(Jsoup.parse(page.asXml()).select("div.ndownlist").select("ul"));
			
		} catch (FailingHttpStatusCodeException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}*/
		
	}
}